Character encoding transformation - basics
- Copyright (C) 2005 Olaf Klein, o.b.klein@gpsbabel.org
+ Copyright (C) 2005-2008 Olaf Klein, o.b.klein@gpsbabel.org
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
return cet_ucs4_to_char(v, vec);
}
+/* =========================================================================== */
+/* %%% UTF-8 string manipulation functions %%% */
+/* =========================================================================== */
+
+/* %%% cet_utf8_strlen %%%
+ *
+ * Returns the number of valid (visible) characters.
+ */
+int
+cet_utf8_strlen(const char *str)
+{
+ if (str) {
+ const char *cin = str;
+ int len = 0;
+
+ while (*cin) {
+ int bytes, value;
+ if (CET_SUCCESS == cet_utf8_to_ucs4(cin, &bytes, &value)) len++;
+ cin += bytes;
+ }
+ return len;
+ }
+ else
+ return 0;
+}
+
+/* %%% cet_utf8_strdup %%%
+ *
+ * Checks and duplicates an UTF-8 string
+ */
+char *
+cet_utf8_strdup(const char *str)
+{
+ if (str)
+ return cet_utf8_strndup(str, strlen(str));
+ else
+ return NULL;
+}
+
+/* %%% cet_utf8_strndup %%%
+ *
+ * Checks and duplicates an UTF-8 string
+ */
+char *
+cet_utf8_strndup(const char *str, const int maxlen)
+{
+ if (str) {
+ const char *cin = str;
+ char *res, *cout;
+ int len = 0;
+
+ res = cout = xstrdup(cin);
+
+ while (*cin && (len < maxlen)) {
+ int bytes, value;
+ if (CET_SUCCESS == cet_utf8_to_ucs4(cin, &bytes, &value)) {
+ cout += cet_ucs4_to_utf8(cout, 6, value);
+ len += 1;
+ }
+ cin += bytes;
+ }
+ *cout = '\0';
+
+ if ((cin - str) != (cout - res)) {
+ cout = xstrdup(res);
+ xfree(res);
+ res = cout;
+ }
+
+ return res;
+ }
+ else
+ return NULL;
+}
+
/* =========================================================================== */
/* %%% full string transformation %%% */
/* =========================================================================== */
return res;
}
+
+/* %%% cet_str_any_to_uni %%%
+ *
+ * Converts a string in given character set to a 'wide string' (unicode)
+ */
+short *
+cet_str_any_to_uni(const char *src, const cet_cs_vec_t *vec, int *length)
+{
+ char *utf8;
+ int len;
+ short *res, *sout;
+
+ if (vec->ucs4_count == 0) utf8 = cet_utf8_strdup(src); /* UTF-8 -> clean UTF-8 */
+ else utf8 = cet_str_any_to_utf8(src, vec);
+
+ len = cet_utf8_strlen(utf8);
+ res = sout = xcalloc(2, len + 1);
+
+ if (len) {
+ char *cin = utf8;
+
+ while (*cin) {
+ int bytes, value;
+ if (CET_SUCCESS == cet_utf8_to_ucs4(cin, &bytes, &value)) {
+ le_write16(sout, value);
+ sout++;
+ }
+ cin += bytes;
+ }
+ }
+
+ *sout = 0;
+ if (length) *length = len;
+
+ return res;
+}
Character encoding transformation - basics header
- Copyright (C) 2005 Olaf Klein, o.b.klein@gpsbabel.org
+ Copyright (C) 2005-2008 Olaf Klein, o.b.klein@gpsbabel.org
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
char *cet_str_uni_to_utf8(const short *src, const int length);
+/* UTF-8 string manipulation functions */
+
+int cet_utf8_strlen(const char *str);
+char *cet_utf8_strdup(const char *str);
+char *cet_utf8_strndup(const char *str, const int maxlen);
+
+/* unicode functions */
+
+short *cet_str_any_to_uni(const char *src, const cet_cs_vec_t *vec, int *length);
+
#endif
Character encoding transformation - utilities
- Copyright (C) 2005,2006,2007 Olaf Klein, o.b.klein@gpsbabel.org
+ Copyright (C) 2005-2008 Olaf Klein, o.b.klein@gpsbabel.org
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
return cet_str_any_to_utf8(src, &cet_cs_vec_cp1252);
}
+short *
+cet_str_utf8_to_uni(const char *src, int *length)
+{
+ return cet_str_any_to_uni(src, &cet_cs_vec_utf8, length);
+}
/* helpers */
ac = 0;
- fprintf(fout, "GPSbabel builtin character sets: (-c option)\n");
+ fprintf(fout, "GPSBabel builtin character sets: (-c option)\n");
for (i = 0; i < c; i++)
{
char **a;
Character encoding transformation - utilities header
- Copyright (C) 2005 Olaf Klein, o.b.klein@gpsbabel.org
+ Copyright (C) 2005-2008 Olaf Klein, o.b.klein@gpsbabel.org
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
char *cet_str_us_ascii_to_utf8(const char *src);
extern cet_cs_vec_t cet_cs_vec_ansi_x3_4_1968;
+short *cet_str_utf8_to_uni(const char *src, int *length);
extern cet_cs_vec_t cet_cs_vec_utf8;